{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "import time\n",
    "import random\n",
    "import dynet as dy\n",
    "import numpy as np\n",
    "\n",
    "# Functions to read in the corpus\n",
    "w2i = defaultdict(lambda: len(w2i))\n",
    "t2i = defaultdict(lambda: len(t2i))\n",
    "UNK = w2i[\"<unk>\"]\n",
    "def read_dataset(filename):\n",
    "  with open(filename, \"r\") as f:\n",
    "    for line in f:\n",
    "      tag, words = line.lower().strip().split(\" ||| \")\n",
    "      yield ([w2i[x] for x in words.split(\" \")], t2i[tag])\n",
    "\n",
    "# Read in the data\n",
    "train = list(read_dataset(\"../data/classes/train.txt\"))\n",
    "w2i = defaultdict(lambda: UNK, w2i)\n",
    "dev = list(read_dataset(\"../data/classes/test.txt\"))\n",
    "nwords = len(w2i)\n",
    "ntags = len(t2i)\n",
    "\n",
    "# Start DyNet and define trainer\n",
    "model = dy.Model()\n",
    "trainer = dy.AdamTrainer(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Define the model\n",
    "EMB_SIZE = 64\n",
    "W_emb = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word embeddings\n",
    "W_sm = model.add_parameters((ntags, EMB_SIZE))          # Softmax weights\n",
    "b_sm = model.add_parameters((ntags))                      # Softmax bias"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# A function to calculate scores for one value\n",
    "def calc_scores(words):\n",
    "  dy.renew_cg()\n",
    "  cbow = dy.esum([dy.lookup(W_emb, x) for x in words])\n",
    "  W_sm_exp = dy.parameter(W_sm)\n",
    "  b_sm_exp = dy.parameter(b_sm)\n",
    "  return W_sm_exp * cbow + b_sm_exp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for ITER in range(100):\n",
    "  # Perform training\n",
    "  random.shuffle(train)\n",
    "  train_loss = 0.0\n",
    "  start = time.time()\n",
    "  for words, tag in train:\n",
    "    my_loss = dy.pickneglogsoftmax(calc_scores(words), tag)\n",
    "    train_loss += my_loss.value()\n",
    "    my_loss.backward()\n",
    "    trainer.update()\n",
    "  print(\"iter %r: train loss/sent=%.4f, time=%.2fs\" % (ITER, train_loss/len(train), time.time()-start))\n",
    "  # Perform testing\n",
    "  test_correct = 0.0\n",
    "  for words, tag in dev:\n",
    "    scores = calc_scores(words).npvalue()\n",
    "    predict = np.argmax(scores)\n",
    "    if predict == tag:\n",
    "      test_correct += 1\n",
    "  print(\"iter %r: test acc=%.4f\" % (ITER, test_correct/len(dev)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
